library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(modelr)
library(ggplot2)

load the oj data

oj <- read_csv('oj.csv')
## Parsed with column specification:
## cols(
##   store = col_integer(),
##   brand = col_character(),
##   week = col_integer(),
##   logmove = col_double(),
##   feat = col_integer(),
##   price = col_double(),
##   AGE60 = col_double(),
##   EDUC = col_double(),
##   ETHNIC = col_double(),
##   INCOME = col_double(),
##   HHLARGE = col_double(),
##   WORKWOM = col_double(),
##   HVAL150 = col_double(),
##   SSTRDIST = col_double(),
##   SSTRVOL = col_double(),
##   CPDIST5 = col_double(),
##   CPWVOL5 = col_double()
## )
oj
## # A tibble: 28,947 × 17
##    store     brand  week  logmove  feat price     AGE60      EDUC
##    <int>     <chr> <int>    <dbl> <int> <dbl>     <dbl>     <dbl>
## 1      2 tropicana    40 9.018695     0  3.87 0.2328647 0.2489349
## 2      2 tropicana    46 8.723231     0  3.87 0.2328647 0.2489349
## 3      2 tropicana    47 8.253228     0  3.87 0.2328647 0.2489349
## 4      2 tropicana    48 8.987197     0  3.87 0.2328647 0.2489349
## 5      2 tropicana    50 9.093357     0  3.87 0.2328647 0.2489349
## 6      2 tropicana    51 8.877382     0  3.87 0.2328647 0.2489349
## 7      2 tropicana    52 9.294682     0  3.29 0.2328647 0.2489349
## 8      2 tropicana    53 8.954674     0  3.29 0.2328647 0.2489349
## 9      2 tropicana    54 9.049232     0  3.29 0.2328647 0.2489349
## 10     2 tropicana    57 8.613230     0  3.29 0.2328647 0.2489349
## # ... with 28,937 more rows, and 9 more variables: ETHNIC <dbl>,
## #   INCOME <dbl>, HHLARGE <dbl>, WORKWOM <dbl>, HVAL150 <dbl>,
## #   SSTRDIST <dbl>, SSTRVOL <dbl>, CPDIST5 <dbl>, CPWVOL5 <dbl>
#plot(oj)
names(oj)
##  [1] "store"    "brand"    "week"     "logmove"  "feat"     "price"   
##  [7] "AGE60"    "EDUC"     "ETHNIC"   "INCOME"   "HHLARGE"  "WORKWOM" 
## [13] "HVAL150"  "SSTRDIST" "SSTRVOL"  "CPDIST5"  "CPWVOL5"
summary(oj)
##      store           brand                week          logmove      
##  Min.   :  2.00   Length:28947       Min.   : 40.0   Min.   : 4.159  
##  1st Qu.: 53.00   Class :character   1st Qu.: 70.0   1st Qu.: 8.490  
##  Median : 86.00   Mode  :character   Median :101.0   Median : 9.034  
##  Mean   : 80.88                      Mean   :100.5   Mean   : 9.168  
##  3rd Qu.:111.00                      3rd Qu.:130.0   3rd Qu.: 9.765  
##  Max.   :137.00                      Max.   :160.0   Max.   :13.482  
##       feat            price           AGE60              EDUC        
##  Min.   :0.0000   Min.   :0.520   Min.   :0.05805   Min.   :0.04955  
##  1st Qu.:0.0000   1st Qu.:1.790   1st Qu.:0.12210   1st Qu.:0.14598  
##  Median :0.0000   Median :2.170   Median :0.17065   Median :0.22939  
##  Mean   :0.2373   Mean   :2.282   Mean   :0.17313   Mean   :0.22522  
##  3rd Qu.:0.0000   3rd Qu.:2.730   3rd Qu.:0.21395   3rd Qu.:0.28439  
##  Max.   :1.0000   Max.   :3.870   Max.   :0.30740   Max.   :0.52836  
##      ETHNIC            INCOME          HHLARGE           WORKWOM      
##  Min.   :0.02425   Min.   : 9.867   Min.   :0.01351   Min.   :0.2445  
##  1st Qu.:0.04191   1st Qu.:10.456   1st Qu.:0.09794   1st Qu.:0.3126  
##  Median :0.07466   Median :10.635   Median :0.11122   Median :0.3556  
##  Mean   :0.15556   Mean   :10.617   Mean   :0.11560   Mean   :0.3592  
##  3rd Qu.:0.18776   3rd Qu.:10.797   3rd Qu.:0.13517   3rd Qu.:0.4023  
##  Max.   :0.99569   Max.   :11.236   Max.   :0.21635   Max.   :0.4723  
##     HVAL150            SSTRDIST          SSTRVOL          CPDIST5      
##  Min.   :0.002509   Min.   : 0.1321   Min.   :0.4000   Min.   :0.7725  
##  1st Qu.:0.123486   1st Qu.: 2.7670   1st Qu.:0.7273   1st Qu.:1.6262  
##  Median :0.346154   Median : 4.6507   Median :1.1154   Median :1.9634  
##  Mean   :0.343766   Mean   : 5.0973   Mean   :1.2073   Mean   :2.1204  
##  3rd Qu.:0.528313   3rd Qu.: 6.6506   3rd Qu.:1.5385   3rd Qu.:2.5337  
##  Max.   :0.916700   Max.   :17.8560   Max.   :2.5714   Max.   :4.1079  
##     CPWVOL5       
##  Min.   :0.09456  
##  1st Qu.:0.27167  
##  Median :0.38323  
##  Mean   :0.43891  
##  3rd Qu.:0.56024  
##  Max.   :1.14337
ggplot(oj, aes(x = log(price), y = logmove, color=brand)) +
  geom_point() 

  # geom_line(oj, aes(x = log(price), y = logmove))
ggplot(oj, aes(x = log(price), y = logmove)) +
  geom_point() 

logmove <- oj$logmove
price_log <- log(oj$price)

summary(logmove)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.159   8.490   9.034   9.168   9.765  13.480
summary(price_log)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.6539  0.5822  0.7747  0.7841  1.0040  1.3530
qplot(logmove, price_log)

regress logmove on log(price)

model1 <- lm(logmove ~ log(price), oj)
summary(model1)
## 
## Call:
## lm(formula = logmove ~ log(price), data = oj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.0441 -0.5853 -0.0330  0.5756  3.7264 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.42342    0.01535  679.04   <2e-16 ***
## log(price)  -1.60131    0.01836  -87.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9071 on 28945 degrees of freedom
## Multiple R-squared:  0.2081, Adjusted R-squared:  0.2081 
## F-statistic:  7608 on 1 and 28945 DF,  p-value: < 2.2e-16
brand <- oj$brand
qplot(brand, price_log)

qplot(logmove, data = oj, geom = "bar")

c <- ggplot(data=oj, group=factor(brand)) +
  geom_bar(aes(brand))
c

qplot(logmove, brand)

regress logmove on log(price) with brand interactions

model2 <- lm(logmove ~ log(price)*brand, oj)
summary(model2)
## 
## Call:
## lm(formula = logmove ~ log(price) * brand, data = oj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4434 -0.5232 -0.0494  0.4884  3.4901 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 10.95468    0.02070 529.136   <2e-16 ***
## log(price)                  -3.37753    0.03619 -93.322   <2e-16 ***
## brandminute.maid             0.88825    0.04155  21.376   <2e-16 ***
## brandtropicana               0.96239    0.04645  20.719   <2e-16 ***
## log(price):brandminute.maid  0.05679    0.05729   0.991    0.322    
## log(price):brandtropicana    0.66576    0.05352  12.439   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7911 on 28941 degrees of freedom
## Multiple R-squared:  0.3978, Adjusted R-squared:  0.3977 
## F-statistic:  3823 on 5 and 28941 DF,  p-value: < 2.2e-16
model2.1 <- lm(logmove ~ log(price):brand, oj)
summary(model2.1)
## 
## Call:
## lm(formula = logmove ~ log(price):brand, data = oj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.7859 -0.5188 -0.0570  0.4840  3.5856 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 11.29172    0.01668   676.8   <2e-16 ***
## log(price):branddominicks   -3.92032    0.03042  -128.9   <2e-16 ***
## log(price):brandminute.maid -2.65843    0.02243  -118.5   <2e-16 ***
## log(price):brandtropicana   -2.13001    0.01734  -122.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.801 on 28943 degrees of freedom
## Multiple R-squared:  0.3827, Adjusted R-squared:  0.3826 
## F-statistic:  5981 on 3 and 28943 DF,  p-value: < 2.2e-16
model2.2 <- lm(logmove ~ log(price) + brand, oj)
summary(model2.2)
## 
## Call:
## lm(formula = logmove ~ log(price) + brand, data = oj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3152 -0.5246 -0.0502  0.4929  3.5088 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      10.82882    0.01453  745.04   <2e-16 ***
## log(price)       -3.13869    0.02293 -136.89   <2e-16 ***
## brandminute.maid  0.87017    0.01293   67.32   <2e-16 ***
## brandtropicana    1.52994    0.01631   93.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7935 on 28943 degrees of freedom
## Multiple R-squared:  0.3941, Adjusted R-squared:  0.394 
## F-statistic:  6275 on 3 and 28943 DF,  p-value: < 2.2e-16
plot(model2.2)

model2.3 <- lm(logmove ~ log(price) + log(price):brand, oj)
summary(model2.3)
## 
## Call:
## lm(formula = logmove ~ log(price) + log(price):brand, data = oj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.7859 -0.5188 -0.0570  0.4840  3.5856 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 11.29172    0.01668  676.76   <2e-16 ***
## log(price)                  -3.92032    0.03042 -128.88   <2e-16 ***
## log(price):brandminute.maid  1.26188    0.01873   67.38   <2e-16 ***
## log(price):brandtropicana    1.79031    0.01979   90.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.801 on 28943 degrees of freedom
## Multiple R-squared:  0.3827, Adjusted R-squared:  0.3826 
## F-statistic:  5981 on 3 and 28943 DF,  p-value: < 2.2e-16
plot(model2.3)

regress logmove on log(price) interacted with brand and feat

feat <- oj$feat
summary(feat)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2373  0.0000  1.0000
model3 <- lm(logmove ~ log(price)*brand*feat, oj)
summary(model3)
## 
## Call:
## lm(formula = logmove ~ log(price) * brand * feat, data = oj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8893 -0.4290 -0.0091  0.4125  3.2368 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      10.40658    0.02335 445.668  < 2e-16 ***
## log(price)                       -2.77415    0.03883 -71.445  < 2e-16 ***
## brandminute.maid                  0.04720    0.04663   1.012    0.311    
## brandtropicana                    0.70794    0.05080  13.937  < 2e-16 ***
## feat                              1.09441    0.03810  28.721  < 2e-16 ***
## log(price):brandminute.maid       0.78293    0.06140  12.750  < 2e-16 ***
## log(price):brandtropicana         0.73579    0.05684  12.946  < 2e-16 ***
## log(price):feat                  -0.47055    0.07409  -6.351 2.17e-10 ***
## brandminute.maid:feat             1.17294    0.08196  14.312  < 2e-16 ***
## brandtropicana:feat               0.78525    0.09875   7.952 1.90e-15 ***
## log(price):brandminute.maid:feat -1.10922    0.12225  -9.074  < 2e-16 ***
## log(price):brandtropicana:feat   -0.98614    0.12411  -7.946 2.00e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.695 on 28935 degrees of freedom
## Multiple R-squared:  0.5354, Adjusted R-squared:  0.5352 
## F-statistic:  3031 on 11 and 28935 DF,  p-value: < 2.2e-16
plot(model3)